{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "46360dcf",
   "metadata": {},
   "source": [
    "## 1. word2vec模型"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "029f6b08",
   "metadata": {},
   "source": [
    "### 1.1 代码包引入"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "830bfd74",
   "metadata": {},
   "outputs": [],
   "source": [
    "from gensim.models.word2vec import Word2Vec\n",
    "import gensim.downloader"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "56b47102",
   "metadata": {},
   "source": [
    "### 1.2 引入Word2vec模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "31412a6f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['fasttext-wiki-news-subwords-300',\n",
       " 'conceptnet-numberbatch-17-06-300',\n",
       " 'word2vec-ruscorpora-300',\n",
       " 'word2vec-google-news-300',\n",
       " 'glove-wiki-gigaword-50',\n",
       " 'glove-wiki-gigaword-100',\n",
       " 'glove-wiki-gigaword-200',\n",
       " 'glove-wiki-gigaword-300',\n",
       " 'glove-twitter-25',\n",
       " 'glove-twitter-50',\n",
       " 'glove-twitter-100',\n",
       " 'glove-twitter-200',\n",
       " '__testing_word2vec-matrix-synopsis']"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "list(gensim.downloader.info()['models'].keys())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8cd2ea1f",
   "metadata": {},
   "outputs": [],
   "source": [
    "word_vectors = gensim.downloader.load('word2vec-google-news-300')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c8d126a5",
   "metadata": {},
   "source": [
    "### 1.3 训练word2vec模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "3afabe2c",
   "metadata": {},
   "outputs": [],
   "source": [
    "sentences = [['猫','吃','鱼'],['狗','吃','肉']] # 构造数据集\n",
    "model = Word2Vec(sentences, min_count=1, sg=1) #训练模型\n",
    "model_path = 'model/demo.model'\n",
    "model.save(model_path) # 保存模型"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "51b453d5",
   "metadata": {},
   "source": [
    "### 1.4 词向量"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "37152b9e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([-0.00713902,  0.00124103, -0.00717672, -0.00224462,  0.0037193 ,\n",
       "        0.00583312,  0.00119818,  0.00210273, -0.00411039,  0.00722533,\n",
       "       -0.00630704,  0.00464722, -0.00821997,  0.00203647, -0.00497705,\n",
       "       -0.00424769, -0.00310898,  0.00565521,  0.0057984 , -0.00497465,\n",
       "        0.00077333, -0.00849578,  0.00780981,  0.00925729, -0.00274233,\n",
       "        0.00080022,  0.00074665,  0.00547788, -0.00860608,  0.00058446,\n",
       "        0.00686942,  0.00223159,  0.00112468, -0.00932216,  0.00848237,\n",
       "       -0.00626413, -0.00299237,  0.00349379, -0.00077263,  0.00141129,\n",
       "        0.00178199, -0.0068289 , -0.00972481,  0.00904058,  0.00619805,\n",
       "       -0.00691293,  0.00340348,  0.00020606,  0.00475375, -0.00711994,\n",
       "        0.00402695,  0.00434743,  0.00995737, -0.00447374, -0.00138926,\n",
       "       -0.00731732, -0.00969783, -0.00908026, -0.00102275, -0.00650329,\n",
       "        0.00484973, -0.00616403,  0.00251919,  0.00073944, -0.00339215,\n",
       "       -0.00097922,  0.00997913,  0.00914589, -0.00446183,  0.00908303,\n",
       "       -0.00564176,  0.00593092, -0.00309722,  0.00343175,  0.00301723,\n",
       "        0.00690046, -0.00237388,  0.00877504,  0.00758943, -0.00954765,\n",
       "       -0.00800821, -0.0076379 ,  0.00292326, -0.00279472, -0.00692952,\n",
       "       -0.00812826,  0.00830918,  0.00199049, -0.00932802, -0.00479272,\n",
       "        0.00313674, -0.00471321,  0.00528084, -0.00423344,  0.0026418 ,\n",
       "       -0.00804569,  0.00620989,  0.00481889,  0.00078719,  0.00301345],\n",
       "      dtype=float32)"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model = Word2Vec.load(model_path) # 加载模型\n",
    "model.wv['猫'] # 输出词向量"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3c1ce806",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
